--- title: "Preschool Randot Test/Retest Reliability" author: "Jenny Read & Kathleen Vancleef" date: "13 March 2019" output: word_document: default html_document: default editor_options: chunk_output_type: console --- ```{r setup, echo=FALSE} knitr::opts_chunk$set(echo = TRUE) rm(list=ls()) library(ggplot2) library(stringr) library(tidyr) library(dplyr) library(cowplot) library(ggExtra) library("ordinal") datafile = "RandotPreschoolReliability.RData" load(datafile) # Give it a shorter name for convenience: reldata = droplevels(WP2data_rel) # For plotting ggplot2::theme_set(theme_light()) palette="Spectral" # Some clean-up: # One subject erroneously marked as girl in session 2: reldata$INFO.Gender[reldata$Study.ID==17 & reldata$Session==2] = "b" reldata$INFO.Gender = dplyr::recode(toupper(reldata$INFO.Gender),"B"="M","G"="F","NA"="unknown") # Drop session 3 as that's where kids were tested with the Randot in vertical orientation reldata = reldata[reldata$Session<3,] reldata = select(reldata,-contains("Vertical")) # Remove any who were not testable on first session, as I don't regard a change from non-testable to testable # as reflecting poor reliability reldata = reldata[reldata$RAN.testable=="Y",] # NB the RAN.testable field refers to testability on FIRST session # Group kids into age based on age at FIRST session jID = reldata$Study.ID[which(floor(reldata$AgeYears)==2 & reldata$Session==1)] reldata$AgeGroup[reldata$Study.ID %in% jID]="2yo" jID = reldata$Study.ID[which(floor(reldata$AgeYears)==3 & reldata$Session==1)] reldata$AgeGroup[reldata$Study.ID %in% jID]="3yo" jID = reldata$Study.ID[which(floor(reldata$AgeYears)==4 & reldata$Session==1)] reldata$AgeGroup[reldata$Study.ID %in% jID]="4yo" jID = reldata$Study.ID[which(floor(reldata$AgeYears)==5 & reldata$Session==1)] reldata$AgeGroup[reldata$Study.ID %in% jID]="5yo" jID = reldata$Study.ID[which((floor(reldata$AgeYears)==6 | floor(reldata$AgeYears)==7 ) & reldata$Session==1)] reldata$AgeGroup[reldata$Study.ID %in% jID]="6 & 7yo" jID = reldata$Study.ID[which((floor(reldata$AgeYears)==10 | floor(reldata$AgeYears)==11 ) & reldata$Session==1)] reldata$AgeGroup[reldata$Study.ID %in% jID]="10 & 11yo" reldata$AgeGroup = factor(as.factor(reldata$AgeGroup),levels=c("2yo","3yo","4yo","5yo","6 & 7yo","10 & 11yo")) # Remove 2yos as I only have 4 and it's too few: reldata = droplevels(reldata[reldata$AgeGroup!="2yo",]) # Record age at first session for (jID in unique(reldata$Study.ID)) { reldata$AgeAtSession1[reldata$Study.ID==jID] = reldata$AgeYears[reldata$Study.ID==jID & reldata$Session==1] reldata$DaysBetweenSessions[reldata$Study.ID==jID] = reldata$date[reldata$Study.ID==jID & reldata$Session==2] - reldata$date[reldata$Study.ID==jID & reldata$Session==1] } # Remove any kids with more than 30 days between test sessions: reldata = reldata[reldata$DaysBetweenSessions<30,] count(reldata,vars="AgeGroup") nkids = length(unique(reldata$Study.ID)) # Only keep columns I am going to use trimdata = reldata[,c("Study.ID","INFO.Gender","AgeGroup","AgeAtSession1","Session","RAN.stereo","DaysBetweenSessions")] trimdata$RAN.stereo = as.numeric(as.character(trimdata$RAN.stereo)) trimdata = spread(trimdata,Session,RAN.stereo) colnames(trimdata)[colnames(trimdata)=="1"]="Randot1" colnames(trimdata)[colnames(trimdata)=="2"]="Randot2" # Make a variable for binary pass/fail: trimdata$RANPASS1 = trimdata$Randot1<5000 trimdata$RANPASS2 = trimdata$Randot2<5000 # Combine these into the binary outcomes trimdata$Binary[trimdata$RANPASS1==TRUE & trimdata$RANPASS2==TRUE] = "Pass-Pass" trimdata$Binary[trimdata$RANPASS1==TRUE & trimdata$RANPASS2==FALSE] = "Pass-Fail" trimdata$Binary[trimdata$RANPASS1==FALSE & trimdata$RANPASS2==TRUE] = "Fail-Pass" trimdata$Binary[trimdata$RANPASS1==FALSE & trimdata$RANPASS2==FALSE] = "Fail-Fail" # Plot numbers in each class trimdata$Binary = factor(as.factor(trimdata$Binary),levels=c("Pass-Pass","Fail-Fail","Fail-Pass","Pass-Fail")) knitr::kable(trimdata %>% group_by(AgeGroup,Binary) %>% tally()) trimdata$Binary = factor(as.factor(trimdata$Binary),levels=c("Pass-Fail","Fail-Pass","Fail-Fail","Pass-Pass")) ``` A subset of children aged three and older were retested on Preschool Randot in a separate session within three weeks of the first test (max `r max(reldata$DaysBetweenSessions)`, mean `r round(mean(reldata$DaysBetweenSessions))` days). We compared results on the two sessions to calculate the test/retest reliability. For this analysis, we did not exclude any children because of visual conditions, but we did exclude children who were recorded as having worn optical correction on one session but not on the other, since we did not want to confound poor test reliability with changes in optical correction. We also excluded children who did not understand the test the first time, as we wanted to understand repeatability of results independent of changes in understanding. In total, this sample consisted of `r nrow(trimdata)` children from `r floor(min(reldata$AgeYears))` to `r floor(max(reldata$AgeYears))` years old. ```{r echo=FALSE} # Work out proprtions for plotting plotdata = trimdata %>% group_by(AgeGroup,Binary) %>% summarise(n=n()) %>% mutate(perc = 100*n / sum(n)) gR =ggplot(data=plotdata,aes(x=AgeGroup,fill=Binary)) + geom_col(aes(y = perc,group=Binary),color="black",size=1) + scale_fill_manual(values=c("red","purple","burlywood","lightgreen")) + labs(x="Age group",y="Percentage",fill="Test-Retest",title="Randot") + theme(axis.text = element_text(size=10)) gR tiff(filename="Fig_ReliabilityBinary.tiff", width=1900,height=1600,pointsize=30,compression="none",res=300) print(gR) dev.off() # Figure ignoring age nPassPass = sum(trimdata$Binary=="Pass-Pass") jPassFail = which(trimdata$Binary=="Pass-Fail") nPassFail = length(jPassFail) nPass800Fail = sum(trimdata$Randot1[jPassFail]==800) ``` Figure 3 shows the proportion of children in each situation, classified by their age-group on the first session. Overall `r round(nPassPass/nkids*100)`% of children passed Preschool Randot both times. Only `r round(nPassFail/nkids*100)`% (`r nPassFail` children) failed on the second session having previously passed. ##Bland-Altman analysis ```{r echo=FALSE} # Bland-Altman plot : BAdata = mutate(trimdata, Randot1=as.numeric(Randot1),Randot2=as.numeric(Randot2)) # Assign a nil value to 1600 BAdata = BAdata %>% mutate_at(vars(Randot1, Randot2), ~ replace(., which(.==5000), 1600)) BAdata = mutate(BAdata, Mean=(Randot1+Randot2)/2 , Diff=(Randot2-Randot1), MeanLog = (log10(Randot1) + log10(Randot2))/2, DiffLog= log10(Randot2)-log10(Randot1) ) # Find bias (mean of diffs) and LoA (1.96*SD of diffs) bias = mean(BAdata$DiffLog) LoA = 1.96*sd(BAdata$DiffLog) t = t.test(log10(BAdata$Randot1),log10(BAdata$Randot2),paired=TRUE) s = sd(BAdata$DiffLog) n = nrow(BAdata) estse = sqrt(3*s*s/n) tvalue = qt(0.975,n-1) # t-tstatistic corresponding to 95% CI LoACIlo = LoA - tvalue * estse LoACIhi = LoA + tvalue * estse BAtable=data.frame("Age group"=as.character(),"N"=as.integer(),"log-thresholds"=as.character(),"thresholds"=as.character(),"Spearman"=as.character(),"Pearson"=as.character(),stringsAsFactors = FALSE) jgrp = length(levels(BAdata$AgeGroup))+1 # All ages combined BAtable[jgrp,"N"] = n BAtable[jgrp,"Age.group"] = "All" BAtable[jgrp,"log.thresholds"] = sprintf(" %3.2f (%3.2f to %3.2f)", LoA,LoACIlo,LoACIhi) BAtable[jgrp,"thresholds"] = sprintf(" %3.2f (%3.2f to %3.2f)", 10^LoA,10^LoACIlo,10^LoACIhi) spearman = DescTools::SpearmanRho(BAdata$Randot1,BAdata$Randot2, use="all.obs",conf.level=0.95 ) spearmanrho = paste(formatC(spearman["rho"],digits=2) , " (" , formatC(spearman["lwr.ci"],digits=2), "-" ,formatC(spearman["upr.ci"],digits=2), ")") pearson = cor.test(log10(BAdata$Randot1),log10(BAdata$Randot2), use="all.obs", method="pearson" ) pearsonrho = paste(formatC(pearson$estimate,digits=2) , " (" , formatC(pearson$conf.int[1],digits=2), "-" ,formatC(pearson$conf.int[2],digits=2), ")") BAtable[jgrp,"Spearman"] = spearmanrho BAtable[jgrp,"Pearson"] = pearsonrho ``` To assess agreement in more detail, we first examined the correlation between scores on the two tests. The Spearman correlation coefficient, which examines the ranking of scores, was `r spearmanrho`. To compute the Pearson correlation , following previous workers, we first replaced a "nil" score with a notional level of 1600 arcsec, i.e. one log-level up from the highest available score of 800 arcsec. The Pearson correlation coefficient between the log-thresholds on the two sessions was `r pearsonrho`. Both correlations were extremely significant (p<1e-10). We then carried out a Bland-Altman analysis. Following previous workers, we did the analysis on log-thresholds rather than thresholds, since log-thresholds are closer to normal, again after assigning 1600 arcsec as "nil". The mean difference between results on the two sessions was `r formatC(bias,digits=3)` log-arcsec or a factor of `r formatC(10^bias,digits=2)`, and this was significantly different from zero (p=`r t$p.value`, paired t-test). Thus, children tended to obtain a better score on the second session, presumably due to practice effects. Since this improvement is small compared with the variability between the two sessions, we will neglect it in computing the reliability. We quantify this using the Bland-Altman 95% limit of reliability, where a value of L means that one can be 95% confident that the result of a second test will lie within +/- L of the first. For a normal distribution, this corresponds to 1.96 times the standard deviation of the differences. In our Preschool Randot data, this gives L = `r formatC(LoA,digits=2)` log10 arcsec, corresponding to a factor of `r formatC(10^LoA,digits=2)` in arcsec. For example, if a children scores 200 arcsec in the first session, their score in the second session could be between 50 arcsec and 800 arcsec, without any change in their vision. One can also compute the 95% confidence interval on the estimate L. We follow Bland & Altman’s (1986) recipe for this, estimating the standard error on the limits of reliability as sqrt(3s^2/n), where s is the standard deviation of the differences and n is the sample size. In this way we estimate a 95% confidence interval of `r formatC(LoACIlo,digits=2)` to `r formatC(LoACIhi,digits=2)` log10 arcsec (factors of `r formatC(10^LoACIlo,digits=2)` to `r formatC(10^LoACIhi,digits=2)`). ```{r echo=FALSE} # Find the possible means and differences of two Randots, for marking on plot Ranvals = c(40,60,100,200,400,800) logRanvals = round(log10(Ranvals)*1e3)/1e3 # to avoid rounding errors n = length(Ranvals) Randiffs = rep(logRanvals,n) Ranmeans = rep(logRanvals,n) for (j in 1:n) { Randiffs[1:n + (j-1)*n] = Randiffs[1:n + (j-1)*n] - logRanvals[j] Ranmeans[1:n + (j-1)*n] = (Ranmeans[1:n + (j-1)*n] + logRanvals[j]) / 2 } Ranmeans = sort(unique(Ranmeans)) Randiffs = sort(unique(Randiffs)) g=ggplot(data=BAdata,aes(x=MeanLog,y=DiffLog)) + geom_count(shape=21,fill="red") + scale_size_area(max_size=14) + scale_y_continuous(name = "Difference in log-thresholds (session 2 - session 1)",minor_breaks=Randiffs) + scale_x_continuous(name = "Mean log-threshold (log-arcsec)",minor_breaks=Ranmeans) + geom_hline(yintercept=0) + geom_hline(yintercept=bias,linetype="dashed") + geom_hline(yintercept=LoA,linetype="dotted") + geom_hline(yintercept=-LoA,linetype="dotted" ) tiff(filename="Fig_BlandAltmanAll.tiff", width=1900,height=1600,pointsize=30,compression="none",res=300) print(g) dev.off() # More elaborate plot with age group g=ggplot(data=BAdata,aes(x=MeanLog,y=DiffLog)) + geom_point(aes(fill=AgeGroup,shape=AgeGroup),position=position_jitter(width = 0.03, height = 0.06),alpha=0.5) + scale_size_area(max_size=10) + scale_shape_manual(values=c(21:25)) + scale_y_continuous(name = "Difference in log-thresholds (log-arcsec, session 2 - session 1)",minor_breaks=Randiffs) + scale_x_continuous(name = "Mean log-threshold (log_{10} arcsec)",minor_breaks=Ranmeans) ``` ### Dependence of reliability on age ```{r echo=FALSE} g = ggplot(data=BAdata,aes(x=AgeGroup,y=DiffLog,fill=INFO.Gender)) + geom_point(position=position_jitter(width = 0.15, height = 0.05),alpha=0.5,shape=21,size=6) + scale_fill_brewer(palette,type="qual") + geom_hline(yintercept=0) + scale_y_continuous(name = "Difference in log-thresholds (session 2 - session 1)") + guides(fill=guide_legend("Sex")) + xlab("Age group") # Mark means by agegroup for (AgeGroup in levels(BAdata$AgeGroup)) { jage = which(BAdata$AgeGroup==AgeGroup) # indexes of kids in thsi agegroup jgrp = which(levels(BAdata$AgeGroup)==AgeGroup) # which level we are doing? x1 = jgrp-0.5 x2 = jgrp+0.5 n = length(jage) # LoA s = sd(BAdata$DiffLog[jage]) LoA = 1.96*s estse = sqrt(3*s^2/n) # estimated standard error on LoA tvalue = qt(0.975,n-1) # t-tstatistic corresponding to 95% CI # CI: LoACIlo = 1.96*s - tvalue*estse LoACIhi = 1.96*s + tvalue*estse # Correlations spearman = DescTools::SpearmanRho(BAdata$Randot1[jage],BAdata$Randot2[jage], use="all.obs",conf.level=0.95 ) spearmanrho = paste(formatC(spearman["rho"],digits=2) , " (" , formatC(spearman["lwr.ci"],digits=2), "-" ,formatC(spearman["upr.ci"],digits=2), ")") pearson = cor.test(log10(BAdata$Randot1[jage]),log10(BAdata$Randot2[jage]), use="all.obs", method="pearson" ) pearsonrho = paste(formatC(pearson$estimate,digits=2) , " (" , formatC(pearson$conf.int[1],digits=2), "-" ,formatC(pearson$conf.int[2],digits=2), ")") y = LoA g = g + geom_segment(x=x1,xend=x2,y=y,yend=y,size=2) + geom_segment(x=x1,xend=x2,y=-y,yend=-y,size=2) + geom_segment(x=(x1+x2)/2,xend=(x1+x2)/2,y=y-tvalue*estse,yend=y+tvalue*estse) + geom_segment(x=(x1+x2)/2,xend=(x1+x2)/2,y=-y-tvalue*estse,yend=-y+tvalue*estse) BAtable[jgrp,"N"] = n BAtable[jgrp,"Age.group"] = as.character(AgeGroup) BAtable[jgrp,"log.thresholds"] = sprintf(" %3.2f (%3.2f to %3.2f)", LoA,LoACIlo,LoACIhi) BAtable[jgrp,"thresholds"] = sprintf(" %3.2f (%3.2f to %3.2f)", 10^LoA,10^LoACIlo,10^LoACIhi) BAtable[jgrp,"Spearman"] = spearmanrho BAtable[jgrp,"Pearson"] = pearsonrho } g knitr::kable(BAtable) tiff(filename="Fig_BlandAltmanAge.tiff", width=1900,height=1600,pointsize=30,compression="none",res=300) print(g) dev.off() ``` # Effect of age ```{r echo=FALSE} #Linear regression on age m = glm(abs(DiffLog) ~ AgeAtSession1 + INFO.Gender ,data=BAdata) s = summary(m) # t-test a la Fawcett & Birch t = t.test(BAdata$DiffLog[BAdata$AgeAtSession1<5],BAdata$DiffLog[BAdata$AgeAtSession1>=5],paired=FALSE) # F test comparing children below and over 5 v = var.test(BAdata$DiffLog[BAdata$AgeAtSession1<5],BAdata$DiffLog[BAdata$AgeAtSession1>=5]) # Fawcett & Birch 2000 data, read off their Fig 3 (although I can only find 99 kids, not 100) FBunder5 = c(rep(0,43),rep(0.176,6),rep(0.301,6),rep(0.523,2),rep(0.602,2)) FB5up = c(rep(0,30),rep(0.176,4),rep(0.222,5),rep(0.301,1)) vFB = var.test(FBunder5,FB5up) ``` A linear regression of absolute difference in scores against age, with sex as a covariate, revealed a highly significant decrease in absolute difference with age (p<0.001). Absolute differences were slightly higher for boys, though this was not significant (offset = `r formatC(m$coefficients["INFO.GenderM"],digits=2)`, p=`r formatC(s$coefficients["INFO.GenderM","Pr(>|t|)"],digits=2)`). The variance of the differences was `r formatC(v$statistic,digits=2)` times greater in the `r sum(BAdata$AgeAtSession1<5)` children aged under 5 at first test than in the `r sum(BAdata$AgeAtSession1>=5)` children aged 5 and over, and this is highly significant (p=`r v$p.value`). This is also true for Fawcett & Birch's data: the variance of their differences was `r formatC(vFB$statistic,digits=2)` times greater in the children aged under 5 than in the children aged 5 and over, and this is highly significant (p=`r vFB$p.value`)