Abstract
This page accompanies our paper submission to the International Conference on Conceptual Modeling (ER 2017). The data, redacted to include information only relevant to this study and remove personally identifying information can be found here and here, availed here in compliance to clause “Anonymized data will be included as an appendix in the PIs major research report and potential publications of this research.” of informed consent. The models used can be found here.
# Read the data
# First data set
df1 <- read.csv("http://www.yorku.ca/liaskos/Papers/ER2017/Data/DataSet1.csv",header = TRUE,stringsAsFactors=FALSE)
# Second data set with revised second instrument.
df2 <- read.csv("http://www.yorku.ca/liaskos/Papers/ER2017/Data/DataSet2.csv",header = TRUE,stringsAsFactors=FALSE)
dfModels <- read.csv("Models.csv",header = TRUE,stringsAsFactors=FALSE)
#
# Deal with consistency (available for second bunhc only)
#
df2$Consistency1 <- df2$S1.R4 - df2$S1.R4.Duplicate
df2$Consistency2 <- df2$S2.R1 - df2$S2.R1.Duplicate
# Clean - Up
df2$S1.R4.Duplicate <- NULL
df2$S2.R1.Duplicate <- NULL
#
# Find Preferred Model from Number
#
dfM <- melt(dfModels,id.vars = c("Structure","Model"))
dfM$variable <- as.character(dfM$variable)
for (i in c(1,2,3,5,6,7,8,9,10)) {
for (j in 1:4) {
colu <- paste0("S",i,".R",j)
df2[,paste0(colu,".v2")] <- as.character(lapply(df2[,colu], FUN = function(x) dfM[(dfM$Structure == i) & (dfM$Model == j) & (round(dfM$value,2) == round(x,2)),c("variable")]))
}
}
#
# Consistency Filtering (2nd version only)
#
# Two of the models (S1R4 and S2S1) are repeated. We filter responses
# for which we have a greater than .2 total distance between the replicated
# responses
df2<-df2[df2$Consistency1 + df2$Consistency2<0.2,]
#
# Merge the two data sets
#
# Clean Up to prepare for rbind
df2$Consistency1 <- NULL
df2$Consistency2 <- NULL
## Prepare trailing questions for rbind
df1$Meth1 <- "0"
df1$Meth.Details <- ""
df2$Str1Calculation <- ""
df2$Str1Calculation.Other. <- ""
df2$Response.ID <- df2$Response.ID + 2000
df<-rbind(df1, df2)
# Recode Various fields
dfOriginal <- df
# Map The Degree
df[df$Degre == "Master degree","Degree"] = "MSc"
df[df$Degre == "Bachelor degree","Degree"] = "BSc"
df[df$Degre == "PhD degree","Degree"] = "PhD"
df[df$Degre == "Other(MD)","Degree"] = "PhD"
# Map the Field of Study
df[df$FieldOfStudy == "Humanities (e.g. English, Philosophy, History)","FieldOfStudy"] = "Humanities"
df[df$FieldOfStudy == "Social Sciences (e.g. Sociology, Political Science, Communication)","FieldOfStudy"] = "Social Sciences"
df[df$FieldOfStudy == "Science, Technology and Engineering","FieldOfStudy"] = "Science and Tech."
df[df$FieldOfStudy == "Business and Economics","FieldOfStudy"] = "Business and Econ."
df[df$FieldOfStudy == "Other(Two of the above: social sciences and humanities)","FieldOfStudy"] = "Social Sciences"
df[df$FieldOfStudy == "Other(journalism)","FieldOfStudy"] = "Social Sciences"
#Re-code the trailing part
df[df$Str1Calculation == "0.8*0.8/(0.8+0.8) + 0.4*0.9/(0.4+0.9) = 0.64/1.8 + 0.36/1.3 = 0.63","StatedMethod"] = "SerPar"
df[df$Str1Calculation == "max(0.8*0.8, 0.4*0.9) = max(0.64, 0.36) = 0.64","StatedMethod"] = "Bayes"
df[df$Str1Calculation == "0.8*0.8 + 0.4*0.9 = 0.64 + 0.36 = 1.0","StatedMethod"] = "Linear"
df[df$Str1Calculation == "max ( min(0.8*0.8) , min(0.4*0.9) ) = max(0.8 , 0.4) = 0.8","StatedMethod"] = "MinMax"
df[df$Str1Calculation == "min(0.8,0.8) + min(0.4,0.9) = 0.8 + 0.4 = 1.2","StatedMethod"] = "Other"
df[df$Str1Calculation == "Other","StatedMethod"] = "Other"
# Based on last author's qualitative judgement of the "Other" field.
df[df$Response.ID %in% c(78,60),"StatedMethod"] = "Don't know"
df[df$Response.ID %in% c(2022,2031),"StatedMethod"] = "Linear"
df[df$Response.ID %in% c(2015,2021,2035),"StatedMethod"] = "Other"
# Recode for levels, styles.
levels <- list(L1 = c(1,2,3), L2 = c(5,6,7), L3 = c(8,9,10))
typ <- list(Rest = c(1,2), Free = c(3,4))
measures <- c("Bayes","Linear","MinMax","SerPar")
# Create the colunns
colist = NULL
for (l in c("L1","L2","L3")){
for (r in c("Free","Rest")){
for (m in measures){
colist = c(colist,paste0(l,".",r,".",m))
df[,c(paste0(l,".",r,".",m))] = 0
}
}
}
# Fill in the columns
for (i in 1:nrow(df)) { # For each case
for (l in c("L1","L2","L3")){ # For each Size
for (r in c("Free","Rest")){ # For each Style
for (m in measures){
targetColumn = paste0(l,".",r,".",m)
bag <- NULL
for (x in levels[[l]]){
for (y in typ[[r]]){
bag = c(bag,df[i,paste0("S",x,".R",y,".v2")])
}
}
bag <- factor(bag,levels = measures)
t <- table(bag)
df[i,targetColumn] = t[[m]]
}
}
}
}
dfW <- df[,c("Response.ID","Age","Gender","Degree","FieldOfStudy",colist)]
dfL <- melt(dfW, id=c("Response.ID", "Age","Gender","Degree","FieldOfStudy"))
dfL$Level = substr(dfL$variable,1,2)
dfL$Style = substr(dfL$variable,4,7)
dfL$Model = substring(dfL$variable,9)
dfL$Level = factor(dfL$Level)
dfL$Style = factor(dfL$Style)
dfL$Model = factor(dfL$Model)
dd <- dfL
x<-xtabs( ~ FieldOfStudy + Gender,data = dfW)
addmargins(x)
## Gender
## FieldOfStudy Female Male Sum
## Business and Econ. 8 5 13
## Education 3 3 6
## Fine Arts (e.g. Music, Theater, Film) 2 3 5
## Health Sciences 1 1 2
## Humanities 8 3 11
## Science and Tech. 3 7 10
## Social Sciences 6 3 9
## Sum 31 25 56
ra <- t(apply(dfModels[,c("Bayes","MinMax","SerPar","Linear")],1,rank))
r<-apply(ra,MARGIN = 2, FUN = table)
Size = c("Small","Small","Small","Medium","Medium","Medium","Large","Large","Large")
Depth = c(1,1,1,2,2,2,3,3,3)
Goals = c(3,4,5,5,7,6,7,9,10)
Contributions = c(2,3,4,4,6,5,6,8,9)
s <- cbind(Size, Depth, Goals, Contributions)
ftable(s[order(Depth),])
## Size Depth Goals Contributions
##
## A Small 1 3 2
## B Small 1 4 3
## C Small 1 5 4
## D Medium 2 5 4
## E Medium 2 7 6
## F Medium 2 6 5
## G Large 3 7 6
## H Large 3 9 8
## I Large 3 10 9
#xtable(s[order(Depth),])
Each row in the table below is each of the model. Each column is each theory. The cell represents the ranking of the number resulting from applying the corresponding theory. For example if for a model SerPar gives 0.2, Bayes 0.4, MinMax 0.5 and Linear 0.3, (an unlikely contigence), the rankings will be respectivelly 1,3,4,2.
ra
## Bayes MinMax SerPar Linear
## [1,] 2 3 1 4
## [2,] 2 3 1 4
## [3,] 2 3 1 4
## [4,] 2 3 1 4
## [5,] 2 3 1 4
## [6,] 2 3 1 4
## [7,] 2 3 1 4
## [8,] 2 3 1 4
## [9,] 2 3 1 4
## [10,] 2 4 1 3
## [11,] 2 3 1 4
## [12,] 2 3 1 4
## [13,] 2 4 1 3
## [14,] 2 3 1 4
## [15,] 2 3 1 4
## [16,] 2 3 1 4
## [17,] 2 3 1 4
## [18,] 2 3 1 4
## [19,] 2 3 1 4
## [20,] 2 3 1 4
## [21,] 2 4 1 3
## [22,] 2 3 1 4
## [23,] 2 3 1 4
## [24,] 2 3 1 4
## [25,] 2 3 1 4
## [26,] 2 4 1 3
## [27,] 2 3 1 4
## [28,] 2 3 1 4
## [29,] 2 3 1 4
## [30,] 2 3 1 4
## [31,] 2 3 1 4
## [32,] 2 3 1 4
## [33,] 2 4 1 3
## [34,] 2 3 1 4
## [35,] 2 3 1 4
## [36,] 2 3 1 4
So we observe that Linear is always the largest number the vast majority of the times, MinMax is the second largest also for the vast majority of times, Bayes is always the second smallest and Serial-Parallel is aways the smallest.
par(mfrow=c(1,2))
dfN <- df
rec<-data.frame()
for (alt in c("greater","less","two.sided")) { # for each test type
for (j in c(1,2,3,5,6,7,8,9,10)){ # For each structure
for (k in c(1,2,3,4)) { # For each numberset
col <- paste0("S",j,".R",k,".v2")
c <- table(factor(dfN[, col], levels = c("Bayes","Linear","MinMax","SerPar")))
res <- lapply(c,function(x) binom.test(x,nrow(dfN),1/4,alternative = alt))
rec <- rbind(rec,cbind(Model = col,
Size = ifelse(j %in% c(1,2,3),
"Small", ifelse(j %in% c(5,6,7),"Medium","Large")),
Style = ifelse(k %in% c(1,2),
"Restricted", "Free"),
Direction = ifelse(alt=="two.sided","two.sided",ifelse(alt=="greater","More Preferred","Less Preferred")),
Bayes_p = as.numeric(as.character(res[[1]]$p.value)),
Linear_p = as.numeric(res[[2]]$p.value),
MinMax_p = as.numeric(res[[3]]$p.value),
SerPar_p = as.numeric(res[[4]]$p.value)
)
)
} # Numberset
} # Structure
} # Type of binomial
rec$Bayes_p <- as.numeric(as.character(rec$Bayes_p))
rec$Linear_p <- as.numeric(as.character(rec$Linear_p))
rec$MinMax_p <- as.numeric(as.character(rec$MinMax_p))
rec$SerPar_p <- as.numeric(as.character(rec$SerPar_p))
alpha = 0.05
rec$Bayes <- ifelse(rec$Bayes_p < alpha,1,0)
rec$Linear <- ifelse(rec$Linear_p < alpha,1,0)
rec$MinMax <- ifelse(rec$MinMax_p < alpha,1,0)
rec$SerPar <- ifelse(rec$SerPar_p < alpha,1,0)
rec$None <- ifelse((rec$Bayes==0)&(rec$Linear==0)&(rec$MinMax==0)&(rec$SerPar==0),1,0)
rec$Some <- ifelse((rec$Bayes==0)&(rec$Linear==0)&(rec$MinMax==0)&(rec$SerPar==0),0,1)
rec$Bayes_p <- NULL
rec$Linear_p <- NULL
rec$MinMax_p <- NULL
rec$SerPar_p <- NULL
recL = melt(rec,id.vars = c("Model","Size","Style","Direction"))
colnames(recL)[6] <- "Models"
colnames(recL)[5] <- "Theory"
d1 <- recL[recL$Theory %in% c("Some","None"),]
d1 <- d1[d1$Direction == "two.sided",]
d2 <- recL[recL$Theory %in% c("Bayes","Linear","MinMax","SerPar"),]
d2 <- d2[d2$Direction != "two.sided",]
ggplot(data = d1, aes(x=Size, fill=Theory, y=Models)) +
geom_bar(stat="identity") +
facet_wrap(~ Style) + ylab("# non-random choices") +
theme(text = element_text(size=20))
ggplot(data = d2, aes(x=Size, fill=Theory, y=Models)) +
geom_bar(stat="identity") +
facet_wrap(~ Direction + Style) +
ylab("# of non-random choices") +
theme(text = element_text(size=20))
getBinomP <- function(x,n,p,alt) {
r<-binom.test(x,n,p,alternative = alt)
return(r$p.value)
}
a<-aggregate(value ~ Response.ID + Model,data = dd, FUN = sum)
a$BinomTS_p <- lapply(a$value, FUN = getBinomP,36,1/4,"two.sided")
a$BinomG_p <- lapply(a$value, FUN = getBinomP,36,1/4,"g")
a$BinomL_p <- lapply(a$value, FUN = getBinomP,36,1/4,"l")
a$BinomTS <- ifelse(a$BinomTS_p < 0.05,1,0)
a$BinomG <- ifelse(a$BinomG_p < 0.05,1,0)
a$BinomL <- ifelse(a$BinomL_p < 0.05,1,0)
a$BinomN <- ifelse(a$BinomL+a$BinomG+a$BinomTS == 0,1,0)
b<-aggregate(BinomG ~ Response.ID + Model,data = a, FUN = sum)
b1<-table(b[b$BinomG==1,])
great<-apply(b1[,,1],2,sum)
c<-aggregate(BinomL ~ Response.ID + Model,data = a, FUN = sum)
c1<-table(c[c$BinomL==1,])
less<-apply(c1,2,sum)
n<-aggregate(BinomN ~ Response.ID,data = a, FUN = sum)
none<- nrow(n[n$BinomN==4,])
aS <- na.omit(merge(a,df[,c("Response.ID","StatedMethod")]))
agreed <- nrow(aS[(aS$StatedMethod == aS$Model) & (aS$BinomG ==1),])/length(unique(aS$Response.ID))
opposite <- nrow(aS[(aS$StatedMethod == aS$Model) & (aS$BinomL ==1),])/length(unique(aS$Response.ID))
great
## Bayes Linear MinMax SerPar
## 5 13 27 2
less
## Bayes Linear MinMax SerPar
## 9 12 4 35
none
## [1] 7
At the end of the first version of the instrument the participants are asked what method they follow through an example mode and calculations on the example model representing each of the theories, with the additional option to provide their own method. At the end of the second version of the instrument the participants are instead asked whether “[they] used a specific calculation method”, which they had to describe in their own words or if instead “[they] did not use a specific calculation method, [but] just used [their] intuition”.
Of the total of 30 responses coming from the first version and the 26 responses coming from the second one we identify 35 participants who forcibly (first instrument) or voluntarily (second instrument) identify a specific way of working through the aggregations. Most of those responses were the result of selection from the corresponding multiple choice question (1st version) while a smaller number is described verbally (1st and second version). The latter (8 cases total) are classified under one of the theories or special “Other” and “Don’t Know” categories through qualitative analysis of the response. The table below represents the distribution of the stated methods.
table(df$StatedMethod)
##
## Bayes Don't know Linear MinMax Other SerPar
## 4 2 8 2 8 11
There is a surprising perception that the Serial Parallel model is used, when it is not. As a matter of fact a mere 20% of those 35 participants states that they use a method that also happens to be the one they actually used with statistically significant consistency. A 25.71% actually states that they follow a theory they actually use less with statistically significant consistency.
The finding becomes less surprising by focusing on the second version of the instrument in which participants are not forced to choose a calculation method but are asked if they use their intuition rather than performing calculations. Out of the 26 responses 21 actually say they just use their intuition (no specific method).
In aggregate, we can observe below that free style (weights adding up to more than 1.5) there is a tenancy for participants to select Bayesian or MinMax model and not Linear and SerPar. For restricted style models on the other hand, participants tend to choose Linear much more than other theories, whose choice frequency is much less than expected.
dd$Theory <- dd$Model
x<-xtabs(value ~ Style + Theory,data = dd)
interp <- function(x) pmin(x/6,1)
residuals = (x - 1008/4)/sqrt(1008/4)
mosaic(x,shade = TRUE,gp_args = list(interpolate = interp),labeling = labeling_values, residuals = residuals, gp_text = gpar(fontsize = 18), gp_labels = gpar(fontsize = 12))
CrossTable(x,format="SPSS",prop.chisq = FALSE, prop.c = FALSE)
##
## Cell Contents
## |-------------------------|
## | Count |
## | Row Percent |
## | Total Percent |
## |-------------------------|
##
## Total Observations in Table: 2016
##
## | Theory
## Style | Bayes | Linear | MinMax | SerPar | Row Total |
## -------------|-----------|-----------|-----------|-----------|-----------|
## Free | 301 | 175 | 430 | 102 | 1008 |
## | 29.861% | 17.361% | 42.659% | 10.119% | 50.000% |
## | 14.931% | 8.681% | 21.329% | 5.060% | |
## -------------|-----------|-----------|-----------|-----------|-----------|
## Rest | 208 | 358 | 295 | 147 | 1008 |
## | 20.635% | 35.516% | 29.266% | 14.583% | 50.000% |
## | 10.317% | 17.758% | 14.633% | 7.292% | |
## -------------|-----------|-----------|-----------|-----------|-----------|
## Column Total | 509 | 533 | 725 | 249 | 2016 |
## -------------|-----------|-----------|-----------|-----------|-----------|
##
##
The model depth does not seem to affect the choice.
dd1 <- dd
dd1$Size <- dd1$Level
levels(dd1$Size) <- c("Small","Medium","Large")
interp <- function(x) pmin(x/6,1)
x<-xtabs(value ~ Size + Theory,data = dd1)
residuals = (x - 672/4)/sqrt(672/4)
mosaic(x,shade = TRUE,gp_args = list(interpolate = interp),labeling = labeling_values, residuals = residuals, gp_text = gpar(fontsize = 18), gp_labels = gpar(fontsize = 12))
CrossTable(x,format="SPSS",prop.chisq = FALSE, prop.c = FALSE)
##
## Cell Contents
## |-------------------------|
## | Count |
## | Row Percent |
## | Total Percent |
## |-------------------------|
##
## Total Observations in Table: 2016
##
## | Theory
## Size | Bayes | Linear | MinMax | SerPar | Row Total |
## -------------|-----------|-----------|-----------|-----------|-----------|
## Small | 174 | 155 | 200 | 143 | 672 |
## | 25.893% | 23.065% | 29.762% | 21.280% | 33.333% |
## | 8.631% | 7.688% | 9.921% | 7.093% | |
## -------------|-----------|-----------|-----------|-----------|-----------|
## Medium | 190 | 187 | 238 | 57 | 672 |
## | 28.274% | 27.827% | 35.417% | 8.482% | 33.333% |
## | 9.425% | 9.276% | 11.806% | 2.827% | |
## -------------|-----------|-----------|-----------|-----------|-----------|
## Large | 145 | 191 | 287 | 49 | 672 |
## | 21.577% | 28.423% | 42.708% | 7.292% | 33.333% |
## | 7.192% | 9.474% | 14.236% | 2.431% | |
## -------------|-----------|-----------|-----------|-----------|-----------|
## Column Total | 509 | 533 | 725 | 249 | 2016 |
## -------------|-----------|-----------|-----------|-----------|-----------|
##
##
x<-xtabs(value ~ Size + Model,data = dd1[dd1$Style == "Free",])
residuals = (x - 336/4)/sqrt(336/4)
mosaic(x,shade = TRUE,gp_args = list(interpolate = interp),labeling = labeling_values, residuals = residuals,gp_text = gpar(fontsize = 18), gp_labels = gpar(fontsize = 12))
CrossTable(x,format="SPSS",prop.chisq = FALSE, prop.c = FALSE)
##
## Cell Contents
## |-------------------------|
## | Count |
## | Row Percent |
## | Total Percent |
## |-------------------------|
##
## Total Observations in Table: 1008
##
## | Model
## Size | Bayes | Linear | MinMax | SerPar | Row Total |
## -------------|-----------|-----------|-----------|-----------|-----------|
## Small | 114 | 61 | 112 | 49 | 336 |
## | 33.929% | 18.155% | 33.333% | 14.583% | 33.333% |
## | 11.310% | 6.052% | 11.111% | 4.861% | |
## -------------|-----------|-----------|-----------|-----------|-----------|
## Medium | 108 | 54 | 142 | 32 | 336 |
## | 32.143% | 16.071% | 42.262% | 9.524% | 33.333% |
## | 10.714% | 5.357% | 14.087% | 3.175% | |
## -------------|-----------|-----------|-----------|-----------|-----------|
## Large | 79 | 60 | 176 | 21 | 336 |
## | 23.512% | 17.857% | 52.381% | 6.250% | 33.333% |
## | 7.837% | 5.952% | 17.460% | 2.083% | |
## -------------|-----------|-----------|-----------|-----------|-----------|
## Column Total | 301 | 175 | 430 | 102 | 1008 |
## -------------|-----------|-----------|-----------|-----------|-----------|
##
##
x<-xtabs(value ~ Size + Model,data = dd1[dd1$Style == "Rest",])
residuals = (x - 336/4)/sqrt(336/4)
mosaic(x,shade = TRUE,gp_args = list(interpolate = interp),labeling = labeling_values, residuals = residuals,gp_text = gpar(fontsize = 18), gp_labels = gpar(fontsize = 12))
CrossTable(x,format="SPSS",prop.chisq = FALSE, prop.c = FALSE)
##
## Cell Contents
## |-------------------------|
## | Count |
## | Row Percent |
## | Total Percent |
## |-------------------------|
##
## Total Observations in Table: 1008
##
## | Model
## Size | Bayes | Linear | MinMax | SerPar | Row Total |
## -------------|-----------|-----------|-----------|-----------|-----------|
## Small | 60 | 94 | 88 | 94 | 336 |
## | 17.857% | 27.976% | 26.190% | 27.976% | 33.333% |
## | 5.952% | 9.325% | 8.730% | 9.325% | |
## -------------|-----------|-----------|-----------|-----------|-----------|
## Medium | 82 | 133 | 96 | 25 | 336 |
## | 24.405% | 39.583% | 28.571% | 7.440% | 33.333% |
## | 8.135% | 13.194% | 9.524% | 2.480% | |
## -------------|-----------|-----------|-----------|-----------|-----------|
## Large | 66 | 131 | 111 | 28 | 336 |
## | 19.643% | 38.988% | 33.036% | 8.333% | 33.333% |
## | 6.548% | 12.996% | 11.012% | 2.778% | |
## -------------|-----------|-----------|-----------|-----------|-----------|
## Column Total | 208 | 358 | 295 | 147 | 1008 |
## -------------|-----------|-----------|-----------|-----------|-----------|
##
##
Nothing very substantial. Females prefer MinMax over Linear compared to Men.
interp <- function(x) pmin(x/6,1)
x<-xtabs(value ~ Gender + Model,data = dd)
res1 = (x[1,] - sum(x[1,])/4)/sqrt(sum(x[1,])/4)
res2 = (x[2,] - sum(x[2,])/4)/sqrt(sum(x[2,])/4)
mosaic(x,shade = TRUE,gp_args = list(interpolate = interp),labeling = labeling_values, residuals = rbind(res1,res2),gp_text = gpar(fontsize = 18), gp_labels = gpar(fontsize = 12))
interp <- function(x) pmin(x/6,1)
x<-xtabs(value ~ FieldOfStudy + Model ,data = dd)
res1 = (x[1,] - sum(x[1,])/4)/sqrt(sum(x[1,])/4)
res2 = (x[2,] - sum(x[2,])/4)/sqrt(sum(x[2,])/4)
res3 = (x[3,] - sum(x[3,])/4)/sqrt(sum(x[3,])/4)
res4 = (x[4,] - sum(x[4,])/4)/sqrt(sum(x[4,])/4)
res5 = (x[5,] - sum(x[5,])/4)/sqrt(sum(x[5,])/4)
res6 = (x[6,] - sum(x[6,])/4)/sqrt(sum(x[6,])/4)
res7 = (x[7,] - sum(x[7,])/4)/sqrt(sum(x[7,])/4)
mosaic(x,
shade = TRUE,
gp_args = list(interpolate = interp),
labeling = labeling_values,
rot_labels = c(left = -20),
residuals = rbind(res1,res2,res3,res4,res5,res6,res6),
labeling_args = list(offset_valnames = c(left = -4, top=0)),
gp_text = gpar(fontsize = 18),
gp_labels = gpar(fontsize = 12))
xtabs(value ~ FieldOfStudy + Model,data = dd)
## Model
## FieldOfStudy Bayes Linear MinMax SerPar
## Business and Econ. 103 133 178 54
## Education 70 56 70 20
## Fine Arts (e.g. Music, Theater, Film) 44 57 46 33
## Health Sciences 27 5 31 9
## Humanities 102 89 142 63
## Science and Tech. 79 114 133 34
## Social Sciences 84 79 125 36
x<-xtabs(value ~ FieldOfStudy + Theory ,data = dd[dd$Style =="Rest",])
res1 = (x[1,] - sum(x[1,])/4)/sqrt(sum(x[1,])/4)
res2 = (x[2,] - sum(x[2,])/4)/sqrt(sum(x[2,])/4)
res3 = (x[3,] - sum(x[3,])/4)/sqrt(sum(x[3,])/4)
res4 = (x[4,] - sum(x[4,])/4)/sqrt(sum(x[4,])/4)
res5 = (x[5,] - sum(x[5,])/4)/sqrt(sum(x[5,])/4)
res6 = (x[6,] - sum(x[6,])/4)/sqrt(sum(x[6,])/4)
res7 = (x[7,] - sum(x[7,])/4)/sqrt(sum(x[7,])/4)
mosaic(x,
shade = TRUE,
gp_args = list(interpolate = interp),
labeling = labeling_values,
rot_labels = c(left = -20),
residuals = rbind(res1,res2,res3,res4,res5,res6,res6),
labeling_args = list(offset_valnames = c(left = -4, top=0),
abbreviate_varnames = TRUE),
gp_text = gpar(fontsize = 18),
gp_labels = gpar(fontsize = 12))
xtabs(value ~ FieldOfStudy + Model,data = dd[dd$Style == "Rest",])
## Model
## FieldOfStudy Bayes Linear MinMax SerPar
## Business and Econ. 42 86 72 34
## Education 29 41 25 13
## Fine Arts (e.g. Music, Theater, Film) 15 35 28 12
## Health Sciences 16 2 9 9
## Humanities 47 62 51 38
## Science and Tech. 25 78 59 18
## Social Sciences 34 54 51 23
x<-xtabs(value ~ FieldOfStudy + Theory,data = dd[dd$Style =="Free",])
res1 = (x[1,] - sum(x[1,])/4)/sqrt(sum(x[1,])/4)
res2 = (x[2,] - sum(x[2,])/4)/sqrt(sum(x[2,])/4)
res3 = (x[3,] - sum(x[3,])/4)/sqrt(sum(x[3,])/4)
res4 = (x[4,] - sum(x[4,])/4)/sqrt(sum(x[4,])/4)
res5 = (x[5,] - sum(x[5,])/4)/sqrt(sum(x[5,])/4)
res6 = (x[6,] - sum(x[6,])/4)/sqrt(sum(x[6,])/4)
res7 = (x[7,] - sum(x[7,])/4)/sqrt(sum(x[7,])/4)
mosaic(x,
shade = TRUE,
gp_args = list(interpolate = interp),
labeling = labeling_values,
rot_labels = c(left = -20),
residuals = rbind(res1,res2,res3,res4,res5,res6,res6),
labeling_args = list(offset_valnames = c(left = -4, top=0)),
gp_text = gpar(fontsize = 18),
gp_labels = gpar(fontsize = 12))
xtabs(value ~ FieldOfStudy + Model,data = dd[dd$Style == "Free",])
## Model
## FieldOfStudy Bayes Linear MinMax SerPar
## Business and Econ. 61 47 106 20
## Education 41 15 45 7
## Fine Arts (e.g. Music, Theater, Film) 29 22 18 21
## Health Sciences 11 3 22 0
## Humanities 55 27 91 25
## Science and Tech. 54 36 74 16
## Social Sciences 50 25 74 13
… which is due to…
interp <- function(x) pmin(x/6,1)
x<-xtabs(value ~ FieldOfStudy + Gender,data = dd)
mosaic(x,
shade = TRUE,
gp_args = list(interpolate = interp),
labeling = labeling_values,
gp_text = gpar(fontsize = 18),
gp_labels = gpar(fontsize = 12),
rot_labels = c(left = -20)
)
x
## Gender
## FieldOfStudy Female Male
## Business and Econ. 288 180
## Education 108 108
## Fine Arts (e.g. Music, Theater, Film) 72 108
## Health Sciences 36 36
## Humanities 288 108
## Science and Tech. 108 252
## Social Sciences 216 108
interp <- function(x) pmin(x/6,1)
x<-xtabs(value ~ Age + Model + Style,data = dd)
cotabplot(~ Age + Model | Style,
data = x,
panel = cotab_coindep,
n = 5000,
rot_labels = c(left = -0),
labeling = labeling_values,
gp_text = gpar(fontsize = 20),
gp_labels = gpar(fontsize = 18),
labeling_args = list(offset_valnames = c(left = -4, top=0))
)
x
## , , Style = Free
##
## Model
## Age Bayes Linear MinMax SerPar
## 21-29 53 13 61 17
## 30-39 136 110 224 34
## 40-49 76 49 109 36
## 50-59 22 3 33 14
## 60 or older 14 0 3 1
##
## , , Style = Rest
##
## Model
## Age Bayes Linear MinMax SerPar
## 21-29 34 44 50 16
## 30-39 95 207 140 62
## 40-49 56 77 85 52
## 50-59 22 18 15 17
## 60 or older 1 12 5 0